{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 03INFER.ipynb\n",
    "\n",
    "the Infer class using LLE-like method\n",
    "\n",
    "06/13/2020"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class InferSet(Dataset):\n",
    "    def __init__(self,df):\n",
    "        super().__init__()\n",
    "        self.df = df\n",
    "        self.qids = df['query_id'].unique().tolist()\n",
    "    def __len__(self):\n",
    "        return len(self.qids)\n",
    "    def get_row_features(self, rows):\n",
    "        rois = [x['features'] for idx, x in rows.iterrows()]\n",
    "        idxs = np.cumsum([0] + [x['features'].shape[0] for idx, x in rows.iterrows()])\n",
    "        rois = np.vstack(rois)\n",
    "         \n",
    "        from sklearn.neighbors import NearestNeighbors\n",
    "        nbrs = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(rois)\n",
    "        distances, indices = nbrs.kneighbors(rois)\n",
    "        rois = 0.7 * rois + 0.2 * rois[[indices[:,1]]] + \\\n",
    "                            0.1 * rois[[indices[:,2]]]\n",
    "        return [rois[idxs[i]:idxs[i+1], :] for i in range(len(rows))]\n",
    "    \n",
    "    def __getitem__(self, idx):\n",
    "        qid = self.qids[idx]\n",
    "        rows = self.df[self.df['query_id']==qid].reset_index()\n",
    "        \n",
    "        fisrt_row = rows.iloc[0]\n",
    "        tokens, attn1 = pad_token(fisrt_row['query_tokenid'])\n",
    "        \n",
    "        rois_list = []\n",
    "        attn2_list = []\n",
    "        row_features = self.get_row_features(rows) # LLE-like method transformation\n",
    "        \n",
    "        for idx,row in rows.iterrows():\n",
    "            rois, attn2 = pad_rois (row_features[idx])\n",
    "            rois_list.append(rois[None,:])\n",
    "            attn2_list.append(attn2[None,:])\n",
    "        rois = np.vstack(rois_list)\n",
    "        attn2 = np.vstack(attn2_list)\n",
    "        return tokens, attn1, rois, attn2, fisrt_row['query_id'], rows['product_id'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
